11 Dependencies

Code
library(readr)
library(ggplot2)
# install.packages("datasauRus")
library(datasauRus) 
library(scales)
library(dplyr)
library(tidyr)
library(plotrix) 

# install.packages("devtools")
# devtools::install_github("matthewbjane/ThemePark")
library(ThemePark)
library(patchwork)
library(janitor)
library(knitr)
library(kableExtra)

12 Why plot data?

Summary statistics aren’t enough!

Code
# M and SD
datasaurus_dozen |>
  group_by(dataset) |>
  summarize(mean_x = mean(x),
            sd_x = sd(x),
            mean_y = mean(y),
            sd_y = sd(y)) |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable(align = 'r')|>
  kable_classic(full_width = FALSE)
dataset mean_x sd_x mean_y sd_y
away 54.27 16.77 47.83 26.94
bullseye 54.27 16.77 47.83 26.94
circle 54.27 16.76 47.84 26.93
dino 54.26 16.77 47.83 26.94
dots 54.26 16.77 47.84 26.93
h_lines 54.26 16.77 47.83 26.94
high_lines 54.27 16.77 47.84 26.94
slant_down 54.27 16.77 47.84 26.94
slant_up 54.27 16.77 47.83 26.94
star 54.27 16.77 47.84 26.93
v_lines 54.27 16.77 47.84 26.94
wide_lines 54.27 16.77 47.83 26.94
x_shape 54.26 16.77 47.84 26.93
Code
# correlation
datasaurus_dozen |>
  group_by(dataset) |>
  summarize(correlation = cor(x, y)) |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable(align = 'r') |>
  kable_classic(full_width = FALSE)
dataset correlation
away -0.06
bullseye -0.07
circle -0.07
dino -0.06
dots -0.06
h_lines -0.06
high_lines -0.07
slant_down -0.07
slant_up -0.07
star -0.06
v_lines -0.07
wide_lines -0.07
x_shape -0.07

Always plot your data!

Code
ggplot(datasaurus_dozen, aes(x = x, y = y)) +
  geom_point() +
  facet_wrap(~dataset) +
  theme_minimal()

13 Structure of a ggplot

Whereas the pipes (%>% and |>) are used to create tidy data wrangling and analysis workflows, ggplot functions are added together with +.

Function calls are applied in order as layers. Changing the order functions are called can therefore change the appearance of the plot.

Code
# get data
data_processed <- read_csv("../data/processed/data_processed.csv")

data_after_exclusions <- data_processed |>
  filter(exclude_amp == "include" & 
           n_items == 3 & 
           gender %in% c("male", "female")) 
Code
# data and aesthetics calls
plot_1 <- 
  ggplot(data = data_after_exclusions,
         aes(x = mean_self_report,
             y = amp_score,
             color = gender,
             shape = gender)) +
  # draw lines manually
  geom_vline(xintercept = 4, linetype = "dotted") +
  geom_hline(yintercept = 0.5, linetype = "dotted") +
  # draw geoms using the aesthetics (x, y, color and shape)
  ## points
  geom_point() +
  ## fit curves, in this case a linear model
  geom_smooth(method = "lm") +
  # adjust axis labels and ranges
  scale_x_continuous(name = "Explicit evaluation\n(Self-report)",
                     breaks = scales::breaks_pretty(n = 7)) +
  scale_y_continuous(name = "Implicit evaluation\n(AMP)") +
  # apply a theme
  theme_linedraw() + 
  # adjust elements of the theme
  labs(title = "Scatter plot with linear regression lines",
       color = "Gender",
       shape = "Gender") +
  # adjust the colors 
  scale_color_manual(values = c("female" = "#FF69B4",
                                "male" = "#6495ED"),
                     labels = c("female" = "Female",
                                "male" = "Male")) +
  # adjust the shapes
  scale_shape_manual(values = c("female" = 16, 
                                "male" = 17),
                     labels = c("female" = "Female",
                                "male" = "Male")) +
  # display specific x and y coordinates without dropping data points (nb using `limits` drops data points, coord_cartesian does not) 
  coord_cartesian(xlim = c(1, 7),
                  ylim = c(0, 1))

# display plot below chunk
plot_1

Code
# save plot to disk as pdf
ggsave(plot = plot_1,
       filename = "plots/plot_1.pdf", 
       width = 6,
       height = 5)

Note that you can add additional function calls to objects later, e.g., overriding the previous theme_ call with a new one:

Code
plot_1 + theme_barbie()

14 Histogram using geom_histogram()

14.1 Simple plot for self-reports

Code
ggplot(data = data_after_exclusions,
       aes(x = mean_self_report)) +
  geom_histogram(binwidth = 1)

14.2 Slightly better plot for self-reports

Code
ggplot(data = data_after_exclusions,
       aes(x = mean_self_report)) +
  # more intelligent choices for the binwidth and boundary
  geom_histogram(binwidth = 1, boundary = 0.5) +
  # labeling of the axis points
  scale_x_continuous(breaks = scales::breaks_pretty(n = 7),
                     limits = c(0.5, 7.5)) +
  scale_y_continuous(breaks = seq(0, 60, 10)) +
  theme_minimal()

14.3 Exercise: Plot for gender

Create a similar plot for the gender variable in data_processed (ie before exclusions).

14.4 Exercise: Plot for AMP

Create a similar plot for the AMP scores in data_after_exclusions.

Code
mean_amp <- data_after_exclusions |>
  summarize(mean_amp = mean(amp_score)) |>
  pull(mean_amp)


plot_amp <- 
  ggplot(data = data_after_exclusions,
         aes(x = amp_score)) +
  geom_histogram(binwidth = 0.1) +
  scale_x_continuous(breaks = seq(0, 1, .10),
                     name = "AMP score") +
  scale_y_continuous(breaks = seq(0, 40, 5),
                     name = "Frequency") +
  geom_vline(xintercept = mean_amp, linetype = "dotted") +
  theme_linedraw()

plot_amp

Code
ggsave(plot = plot_amp,
       filename = "plots/plot_amp.pdf", 
       width = 6,
       height = 5)
  • Exercise: How to add a dashed vertical line at the sample’s mean AMP score?

15 Density plot using geom_density()

15.1 Simple plot for self-reports

Code
ggplot(data = data_after_exclusions,
       aes(x = mean_self_report)) +
  geom_density(adjust = 1, # the degree of smoothing can be adjusted here 
               color = "#FF69B4",
               fill = "darkblue", 
               alpha = 0.3) +
  # labeling of the axis points
  scale_x_continuous(breaks = scales::breaks_pretty(n = 7),
                     limits = c(1, 7)) +
  theme_minimal()

15.2 Exercise: Plot for AMP

Make a similar density plot for the AMP.

  • Add a theme.
  • Make the X axis breaks prettier.
  • Name both axis names more clearly.

16 Bar plot using geom_col()

Bar plots are bad and usually shouldn’t be used. But they are sometimes unavoidable, so here’s how to make them.

16.1 Simple plot for AMP

Code
# create the summary values to be plotted
summary_amp <- data_after_exclusions %>%
  group_by(gender) %>%
  summarize(amp_mean = mean(amp_score),
            amp_se = plotrix::std.error(amp_score))

# plot these values
ggplot(data = summary_amp, 
       aes(x = gender, 
           y = amp_mean)) +
  geom_col() +
  # geom_bar(stat = "identity") + # NB geom_col is equivalent to geom_bar when stat == "identity
  geom_linerange(aes(ymin = amp_mean - amp_se, 
                     ymax = amp_mean + amp_se)) 

16.2 Slightly better plot for AMP

Code
ggplot(data = summary_amp, 
       aes(x = gender, 
           y = amp_mean)) +
  geom_col(fill = "#0b6623", # note that you can specify specific colors using hex codes or names
           color = "black", 
           width = 0.6) +
  geom_errorbar(aes(ymin = amp_mean - amp_se, 
                    ymax = amp_mean + amp_se), 
                width = 0.1, 
                color = "black") +
  labs(title = "Bar Plot of with Standard Errors",
       x = "Gender",
       y = "Mean AMP score") +
  theme_linedraw() 

16.3 Exercise: Plot for self-reports

Make a similar plot for the self-reports.

  • Use coord_flip() to swap the X and Y axes.
  • Exercise: How to capitalize ‘Male’ and ‘Female’ by wrangling the data before plotting?

17 Combining plots

Code
plot_all <- data_after_exclusions |>
  ggplot(aes(x = mean_self_report,
             y = amp_score)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ggtitle("All")

plot_women <- data_after_exclusions |>
  filter(gender == "female") |>
  ggplot(aes(x = mean_self_report,
             y = amp_score)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ggtitle("Women")

plot_men <- data_after_exclusions |>
  filter(gender == "male") |>
  ggplot(aes(x = mean_self_report,
             y = amp_score)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ggtitle("Men")

# combine these plots with different arrangements
plot_women + plot_men

Code
plot_women + plot_men + plot_layout(ncol = 1)

Code
plot_all / (plot_women + plot_men)

18 Faceting plots

Without repeating yourself, you can also make a plot for different subsets using facet_wrap() or facet_grid()

Code
ggplot(data = data_after_exclusions,
       aes(x = mean_self_report,
           y = amp_score)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~ gender)

18.1 Exercise

Create a plot that assesses the association between self report scores and AMP scores. By wrangling data_processed more prior to plotting, and using facet_grid(), compare a) men vs women and b) participants who are 30+ years old vs younger than 30.

Improve the appearance of the plot, including its text, colors, theme, etc.

19 Session info

Code
sessionInfo()
R version 4.5.0 (2025-04-11)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.5

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Zurich
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] kableExtra_1.4.0 knitr_1.50       janitor_2.2.1    patchwork_1.3.0 
 [5] ThemePark_0.0.1  plotrix_3.8-4    tidyr_1.3.1      dplyr_1.1.4     
 [9] scales_1.4.0     datasauRus_0.1.9 ggplot2_3.5.2    readr_2.1.5     

loaded via a namespace (and not attached):
 [1] generics_0.1.4     xml2_1.3.8         lattice_0.22-6     stringi_1.8.7     
 [5] hms_1.1.3          digest_0.6.37      magrittr_2.0.3     evaluate_1.0.3    
 [9] grid_4.5.0         timechange_0.3.0   RColorBrewer_1.1-3 sysfonts_0.8.9    
[13] showtextdb_3.0     fastmap_1.2.0      Matrix_1.7-3       jsonlite_2.0.0    
[17] mgcv_1.9-1         purrr_1.1.0        viridisLite_0.4.2  textshaping_1.0.1 
[21] cli_3.6.5          crayon_1.5.3       rlang_1.1.6        splines_4.5.0     
[25] bit64_4.6.0-1      withr_3.0.2        yaml_2.3.10        parallel_4.5.0    
[29] tools_4.5.0        tzdb_0.5.0         showtext_0.9-7     curl_6.4.0        
[33] vctrs_0.6.5        R6_2.6.1           lifecycle_1.0.4    lubridate_1.9.4   
[37] snakecase_0.11.1   stringr_1.5.1      bit_4.6.0          htmlwidgets_1.6.4 
[41] vroom_1.6.5        ragg_1.4.0         pkgconfig_2.0.3    pillar_1.11.0     
[45] gtable_0.3.6       glue_1.8.0         systemfonts_1.2.3  xfun_0.52         
[49] tibble_3.3.0       tidyselect_1.2.1   rstudioapi_0.17.1  farver_2.1.2      
[53] nlme_3.1-168       htmltools_0.5.8.1  rmarkdown_2.29     svglite_2.2.1     
[57] labeling_0.4.3     compiler_4.5.0